package com.jxsd.edu.cn.tools;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import com.jxsd.edu.cn.tools.ParseTool;


/**
 * 爬取详细书籍的信息页面的Url
 * */
public class BookUrlCrawler 
{
	public String crawling(String url) 
	{
		Parser   parse=new  Parser();
 		Example  e=new  Example();
 		String   inputHTML=e.getHtml(url);
 		String   nextUrl=null;
 		try 
 		{
			parse.setInputHTML(inputHTML);
			
			NodeFilter   filter=new   HasAttributeFilter("bind","searchHistoryOp");
			NodeList      nodes=parse.extractAllNodesThatMatch(filter);
			if(nodes!=null)
			{
				Node  textnode=nodes.elementAt(0);
				
				nextUrl=ParseTool.getString(textnode.getText());
				System.out.println("gold  Url:"+nextUrl);
			}
			
		} catch (ParserException e1)
		{
			e1.printStackTrace();
		}
 		
 		return  nextUrl;
	}
}
